def gold2crfpp(file_name):
    with open('../data/' + file_name + '.utf8') as f:
        all_data = f.read().split('\n')
    with open('data/' + file_name + '.txt', 'w') as f:
        for line in all_data:
            for word in line.split(' '):
                for i,char in enumerate(word):
                    if i == 0:
                        f.write(char + ' B\n')
                    else:
                        f.write(char + ' I\n')
            f.write('\n')

if __name__ == '__main__':
    for file_name in ['pku_test_gold', 'msr_test_gold']:
        gold2crfpp(file_name)
    with open('data/all_data.txt', 'w') as all_data_file:
        for file_name in ['pku_test_gold', 'msr_test_gold']:
            with open('data/' + file_name + '.txt') as f:
                data = f.read()
                all_data_file.write(data)